Gráficos de barras animados en R

El objetivo de esta publicación es explicar cómo construir gráficos animados de barras usando R.

Paquetes

Los paquetes que se requieren para construir tramas animadas en R son:

  • ggplot2
  • gganimate

Si bien los anteriores paquetes son , también hemos utilizado tidyverse, janitor y scales en este proyecto para la manipulación, limpieza y formateo de datos.

Datos

El conjunto de datos original utilizado para este proyecto proviene de DataIntelligence Chile.

Sobre los datos:

Estos datos contienen el valor de los casos acumulados de Covid_19 en cuatro comunas del norte de Chile. Desde el inicio del brote hata Mayo del 2020.

Preprocesamiento de datos:

Usaremos el siguiente código para preparar nuestros datos con el formato deseado. En realidad, estamos limpiando los nombres de las columnas, escribiendo los números en formato numérico y convirtiendo los datos de formato ancho a formato largo utilizando la función gather() de tidyr. Los datos ordenados se guardan en un nuevo archivo csv gdp_tidy.csv para su uso posterior.

library(tibble)
library(readr) 
library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.0     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v purrr   0.3.4     v forcats 0.5.0
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
barras_con_4_comunas <- read_csv2("con_4_comunas.csv")
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
# Seleccionamos las columnas requeridas
barras_con_4_comunas <- barras_con_4_comunas %>% select(1:61) 

# Seleccionamos las filas requeridas
barras_con_4_comunas <- barras_con_4_comunas[1:4,]

barras_con_4_comunas_tidy <- barras_con_4_comunas %>% 
  mutate_at(vars(contains("YR")),as.numeric) %>% 
  gather(date ,value ="Acumulados",2:61) %>%
  janitor::clean_names() %>% 
  mutate(date = as.numeric(stringr::str_sub(date,3,7)))
write_csv(barras_con_4_comunas_tidy,"barras_con_4_comunas_tidy10.csv")
library(stringr)
library(gganimate)
library(png)
options(warn = - 1)  
covid_acum <- read_csv("con_4_comunas.csv")
## Parsed with column specification:
## cols(
##   `;YR43893;YR44259;YR44258;YR43896;YR43897;YR43898;YR43899;YR43900;YR43901;YR43902;YR43903;YR43904;YR43905;YR43906;YR43907;YR43908;YR43909;YR43910;YR43911;YR43912;YR43913;YR43914;YR43915;YR43916;YR43917;YR43918;YR43919;YR43920;YR43921;YR43922;YR43923;YR43924;YR43925;YR43926;YR43927;YR43928;YR43929;YR43930;YR43931;YR43932;YR43933;YR43934;YR43935;YR43936;YR43937;YR43938;YR43939;YR43940;YR43941;YR43942;YR43943;YR43944;YR43945;YR43946;YR43947;YR43948;YR43949;YR43950;YR43951;YR43952` = col_character()
## )
str(covid_acum)
## tibble [7 x 1] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ;YR43893;YR44259;YR44258;YR43896;YR43897;YR43898;YR43899;YR43900;YR43901;YR43902;YR43903;YR43904;YR43905;YR43906;YR43907;YR43908;YR43909;YR43910;YR43911;YR43912;YR43913;YR43914;YR43915;YR43916;YR43917;YR43918;YR43919;YR43920;YR43921;YR43922;YR43923;YR43924;YR43925;YR43926;YR43927;YR43928;YR43929;YR43930;YR43931;YR43932;YR43933;YR43934;YR43935;YR43936;YR43937;YR43938;YR43939;YR43940;YR43941;YR43942;YR43943;YR43944;YR43945;YR43946;YR43947;YR43948;YR43949;YR43950;YR43951;YR43952: chr [1:7] "Antofagasta;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;2;6;7;7;9;15;16;17;18;22;29;29;29;32;38;40;40;45;47;47;49;59;72;77;"| __truncated__ "Calama;0;0;0;0;0;0;0;0;0;0;0;2;2;2;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;5;5;6;6;7;8;9;12;12;14;19;19;19;20;20;20;23;"| __truncated__ "Maria Elena;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;2;3;4;4;6;6;6"| __truncated__ "Mejillones;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;1;3;3;3;6;8;9;11;14;15;16;23"| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `;YR43893;YR44259;YR44258;YR43896;YR43897;YR43898;YR43899;YR43900;YR43901;YR43902;YR43903;YR43904;YR43905;YR43906;YR43907;YR43908;YR43909;YR43910;YR43911;YR43912;YR43913;YR43914;YR43915;YR43916;YR43917;YR43918;YR43919;YR43920;YR43921;YR43922;YR43923;YR43924;YR43925;YR43926;YR43927;YR43928;YR43929;YR43930;YR43931;YR43932;YR43933;YR43934;YR43935;YR43936;YR43937;YR43938;YR43939;YR43940;YR43941;YR43942;YR43943;YR43944;YR43945;YR43946;YR43947;YR43948;YR43949;YR43950;YR43951;YR43952` = col_character()
##   .. )
head(covid_acum)
## # A tibble: 6 x 1
##   `;YR43893;YR44259;YR44258;YR43896;YR43897;YR43898;YR43899;YR43900;YR43901;YR4~
##   <chr>                                                                         
## 1 Antofagasta;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;2;6;7;7;9;15;16;17;18;22;29;29;29~
## 2 Calama;0;0;0;0;0;0;0;0;0;0;0;2;2;2;2;2;2;2;2;2;2;2;2;2;3;3;3;3;3;3;5;5;6;6;7;~
## 3 Maria Elena;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0~
## 4 Mejillones;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;~
## 5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;                  
## 6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
# Selecionemos las columnas requeridas:


# Filtramos sólo las filas de comunas:






gdp <- read.csv("GDP_Data.csv")
str(gdp)
## 'data.frame':    269 obs. of  16 variables:
##  $ ï..Series.Name: chr  "GDP (current US$)" "GDP (current US$)" "GDP (current US$)" "GDP (current US$)" ...
##  $ Series.Code   : chr  "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" ...
##  $ Country.Name  : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Country.Code  : chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ X1990..YR1990.: chr  ".." "2028553750" "62045099642.7774" ".." ...
##  $ X2000..YR2000.: chr  ".." "3480355258.04122" "54790245600.5846" ".." ...
##  $ X2009..YR2009.: chr  "12439087076.7667" "12044208085.864" "137211039898.193" "678000000" ...
##  $ X2010..YR2010.: chr  "15856574731.4411" "11926957254.6288" "161207268655.392" "576000000" ...
##  $ X2011..YR2011.: chr  "17804292964.1045" "12890866742.6533" "200019057307.655" "574000000" ...
##  $ X2012..YR2012.: chr  "19907317065.6667" "12319784886.2038" "209058991952.125" "644000000" ...
##  $ X2013..YR2013.: chr  "20561069558.2152" "12776280961.155" "209755003250.664" "641000000" ...
##  $ X2014..YR2014.: chr  "20484885119.7348" "13228247844.1247" "213810022462.428" "643000000" ...
##  $ X2015..YR2015.: chr  "19907111418.9938" "11386931489.7968" "165979277276.907" "661000000" ...
##  $ X2016..YR2016.: chr  "19046357714.4928" "11883682170.8236" "160129866569.935" "653000000" ...
##  $ X2017..YR2017.: chr  "19543976895.4248" "13038538300.2644" "167555280113.181" "634000000" ...
##  $ X2018..YR2018.: chr  ".." ".." ".." ".." ...
head(gdp)
##      ï..Series.Name    Series.Code   Country.Name Country.Code   X1990..YR1990.
## 1 GDP (current US$) NY.GDP.MKTP.CD    Afghanistan          AFG               ..
## 2 GDP (current US$) NY.GDP.MKTP.CD        Albania          ALB       2028553750
## 3 GDP (current US$) NY.GDP.MKTP.CD        Algeria          DZA 62045099642.7774
## 4 GDP (current US$) NY.GDP.MKTP.CD American Samoa          ASM               ..
## 5 GDP (current US$) NY.GDP.MKTP.CD        Andorra          AND 1029048481.88051
## 6 GDP (current US$) NY.GDP.MKTP.CD         Angola          AGO 11228764963.1618
##     X2000..YR2000.   X2009..YR2009.   X2010..YR2010.   X2011..YR2011.
## 1               .. 12439087076.7667 15856574731.4411 17804292964.1045
## 2 3480355258.04122  12044208085.864 11926957254.6288 12890866742.6533
## 3 54790245600.5846 137211039898.193 161207268655.392 200019057307.655
## 4               ..        678000000        576000000        574000000
## 5 1434429703.33518 3660530702.97305 3355695364.23841 3442062830.13622
## 6 9129594818.60749 70307163678.1895 83799496611.6049  111789686464.26
##     X2012..YR2012.   X2013..YR2013.   X2014..YR2014.   X2015..YR2015.
## 1 19907317065.6667 20561069558.2152 20484885119.7348 19907111418.9938
## 2 12319784886.2038  12776280961.155 13228247844.1247 11386931489.7968
## 3 209058991952.125 209755003250.664 213810022462.428 165979277276.907
## 4        644000000        641000000        643000000        661000000
## 5 3164615186.94591 3281585236.32501 3350736367.25488 2811489408.89431
## 6 128052853643.447 136709862831.308 145712200312.505 116193649124.475
##     X2016..YR2016.   X2017..YR2017. X2018..YR2018.
## 1 19046357714.4928 19543976895.4248             ..
## 2 11883682170.8236 13038538300.2644             ..
## 3 160129866569.935 167555280113.181             ..
## 4        653000000        634000000             ..
## 5 2877311946.90265 3012914131.16971             ..
## 6 101123851090.473  122123822333.73             ..
#select required columns
gdp <- gdp %>% select(3:15)

#filter only country rows
gdp <- gdp[1:217,]

gdp_tidy <- gdp %>% 
  mutate_at(vars(contains("YR")),as.numeric) %>% 
  gather(year,value,3:13) %>% 
  janitor::clean_names() %>% #fix column name
  mutate(year = as.numeric(stringr::str_sub(year,1,4)))


summary(gdp_tidy)
##  country_name       country_code            year          value          
##  Length:2387        Length:2387        Min.   : NA    Min.   :8.824e+06  
##  Class :character   Class :character   1st Qu.: NA    1st Qu.:4.435e+09  
##  Mode  :character   Mode  :character   Median : NA    Median :2.020e+10  
##                                        Mean   :NaN    Mean   :3.241e+11  
##                                        3rd Qu.: NA    3rd Qu.:1.384e+11  
##                                        Max.   : NA    Max.   :1.939e+13  
##                                        NA's   :2387   NA's   :193
str(gdp_tidy)
## 'data.frame':    2387 obs. of  4 variables:
##  $ country_name: chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ country_code: chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ year        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ value       : num  NA 2.03e+09 6.20e+10 NA 1.03e+09 ...
head(gdp_tidy)
##     country_name country_code year       value
## 1    Afghanistan          AFG   NA          NA
## 2        Albania          ALB   NA  2028553750
## 3        Algeria          DZA   NA 62045099643
## 4 American Samoa          ASM   NA          NA
## 5        Andorra          AND   NA  1029048482
## 6         Angola          AGO   NA 11228764963
gdp_tidy <- read_csv("gdp_tidy.csv")
## Parsed with column specification:
## cols(
##   country_name = col_character(),
##   country_code = col_character(),
##   year = col_double(),
##   value = col_double()
## )
summary(gdp_tidy)
##  country_name       country_code            year          value          
##  Length:2387        Length:2387        Min.   :1990   Min.   :8.824e+06  
##  Class :character   Class :character   1st Qu.:2009   1st Qu.:4.435e+09  
##  Mode  :character   Mode  :character   Median :2012   Median :2.020e+10  
##                                        Mean   :2010   Mean   :3.241e+11  
##                                        3rd Qu.:2015   3rd Qu.:1.384e+11  
##                                        Max.   :2017   Max.   :1.939e+13  
##                                                       NA's   :193
str(gdp_tidy)
## tibble [2,387 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ country_name: chr [1:2387] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ country_code: chr [1:2387] "AFG" "ALB" "DZA" "ASM" ...
##  $ year        : num [1:2387] 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
##  $ value       : num [1:2387] NA 2.03e+09 6.20e+10 NA 1.03e+09 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   country_name = col_character(),
##   ..   country_code = col_character(),
##   ..   year = col_double(),
##   ..   value = col_double()
##   .. )
head(gdp_tidy)
## # A tibble: 6 x 4
##   country_name   country_code  year        value
##   <chr>          <chr>        <dbl>        <dbl>
## 1 Afghanistan    AFG           1990          NA 
## 2 Albania        ALB           1990  2028553750 
## 3 Algeria        DZA           1990 62045099643.
## 4 American Samoa ASM           1990          NA 
## 5 Andorra        AND           1990  1029048482.
## 6 Angola         AGO           1990 11228764963.
gdp_formatted <- gdp_tidy %>%
  group_by(year) %>%
  # The * 1 makes it possible to have non-integer ranks while sliding
  mutate(rank = rank(-value),
         Value_rel = value/value[rank==1],
         Value_lbl = paste0(" ",round(value/1e9))) %>%
  group_by(country_name) %>% 
  filter(rank <=10) %>%
  ungroup()

staticplot = ggplot(gdp_formatted, aes(rank, group = country_name, 
                fill = as.factor(country_name), color = as.factor(country_name))) +
  geom_tile(aes(y = value/2,
                height = value,
                width = 0.9), alpha = 0.8, color = NA) +
  geom_text(aes(y = 0, label = paste(country_name, " ")), vjust = 0.2, hjust = 1) +
  geom_text(aes(y=value,label = Value_lbl, hjust=0)) +
  coord_flip(clip = "off", expand = FALSE) +
  scale_y_continuous(labels = scales::comma) +
  scale_x_reverse() +
  guides(color = FALSE, fill = FALSE) +
  theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
         axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.grid.major.x = element_line( size=.1, color="grey" ),
        panel.grid.minor.x = element_line( size=.1, color="grey" ),
        plot.title=element_text(size=25, hjust=0.5, face="bold", colour="grey", vjust=-1),
        plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey"),
        plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
        plot.background=element_blank(),
       plot.margin = margin(2,2, 2, 4, "cm"))


anim = staticplot + transition_states(year, transition_length = 4, state_length = 1) +
  view_follow(fixed_x = TRUE)  +
  labs(title = 'GDP per Year : {closest_state}',  
       subtitle  =  "Top 10 Countries",
       caption  = "GDP in Billions USD | Data Source: World Bank Data")

# For GIF
animate(anim, 200, fps = 20,  width = 1200, height = 1000, 
        renderer = gifski_renderer("gganim.gif"))